"""Manage CKI pipelines."""
import copy
import os
import re
from time import sleep  # direct import for independent mocking
import uuid

import yaml

from . import gitlab
from . import misc
from .logger import get_logger

LOGGER = get_logger(__name__)


# pylint: disable=too-many-arguments,too-many-positional-arguments
def _reset_variable(variables, variable_overrides, name, message, *,
                    new_value=None, force=False):
    """Unless overridden on the command line, reset a variable.

    If the variable is present, it is either replaced by new_value or removed
    if new_value is None.

    If force is True, the variable will be set to new_value even if not present
    before.
    """
    if (name in variables or force) and name not in (variable_overrides or {}):
        LOGGER.warning(message)
        if new_value is not None:
            variables[name] = new_value
        else:
            del variables[name]


def _clean_production_vars_notifications(variables, variable_overrides=None):
    """Sanitize variables related to email notifications.

    Ideally, emails will not be sent for pipelines marked with
    CKI_DEPLOYMENT_ENVIRONMENT != production.

    Just to be sure, remove all trigger variables with email addresses.
    """
    _reset_variable(variables, variable_overrides, 'report_rules',
                    'Disabling report rules')


def _clean_production_vars_retrigger(variables, variable_overrides=None):
    """Sanitize variables related to the pipeline type + mark as retrigger.

    Mark pipelines as retriggered by setting CKI_RETRIGGER_PIPELINE: true.
    Make that obvious by adding 'Retrigger' to the pipeline commit
    message ("title"). Ideally, those pipelines will be skipped for anything
    where a production pipeline is required.

    Just to be sure, additionally mark the pipelines by modifying the email
    subject and pipeline type.
    """
    original_subject = variables.get('subject') or variables.get('title') or 'Empty subject'

    _reset_variable(variables, variable_overrides, 'subject',
                    'Marking as retriggered pipeline via subject',
                    new_value=f'Retrigger: {original_subject}', force=True)
    _reset_variable(variables, variable_overrides, 'title',
                    'Marking as retriggered pipeline via title',
                    new_value=f'Retrigger: {original_subject}', force=True)
    _reset_variable(
        variables,
        variable_overrides,
        'CKI_DEPLOYMENT_ENVIRONMENT',
        'Marking as retriggered pipeline via CKI_DEPLOYMENT_ENVIRONMENT',
        new_value='staging',
        force=True,
    )
    _reset_variable(
        variables,
        variable_overrides,
        'CKI_RETRIGGER_PIPELINE',
        'Marking as retriggered pipeline via CKI_RETRIGGER_PIPELINE',
        new_value='true',
        force=True,
    )


def _clean_production_vars_beaker(variables, variable_overrides=None):
    """Sanitize variables related to beaker testing.

    Production pipelines are tested in Beaker. As retriggered pipelines are
    mostly for CI purposes, skip Beaker by default to preserve testing
    resources. Also force the job priority to normal so we're not blocking
    Beaker with random testing in case a test run in Beaker is actually needed.
    """
    _reset_variable(variables, variable_overrides, 'skip_beaker',
                    'Disabling beaker testing',
                    new_value='true', force=True)
    _reset_variable(variables, variable_overrides, 'test_priority',
                    'Reseting job priority')


def _clean_production_vars(variables, variable_overrides=None):
    """Sanitize all variables."""
    _clean_production_vars_notifications(variables, variable_overrides)
    _clean_production_vars_beaker(variables, variable_overrides)
    _clean_production_vars_retrigger(variables, variable_overrides)


def _clean_project_url(repository):
    """Create a clean GitLab project URL from a repository URL.

    This removes any trailing slashes or .git at the end, and adds a protocol if needed.

    This matches the behavior of git_clean_url in the pipeline.
    """
    repository = re.sub('.*://', '', repository)
    repository = re.sub(r'(.git)?/*$', '', repository)
    return f'https://{repository}'


def _configure_tests_only(gl_project, gl_original_pipeline, variables):
    """Reconfigure trigger variables to only run tests.

    This contains a lot of black magic and is deeply connected to the inner
    workings of the pipeline. Tread with care!
    """
    jobs_url = f'{gl_project.attributes["_links"]["self"]}/jobs'
    variables['skip_createrepo'] = 'true'
    variables['skip_merge'] = 'true'
    variables['skip_build'] = 'true'
    variables['skip_publish'] = 'true'
    variables['ARTIFACT_PIPELINE_ID'] = gl_original_pipeline.id

    gl_jobs = [j for j in gl_original_pipeline.jobs.list(iterator=True) if j.stage == 'publish']
    for gl_job in gl_jobs:
        arch = gl_job.name.split(maxsplit=1)[-1].replace(' ', '_')
        variables[f'ARTIFACT_URL_{arch}'] = f'{jobs_url}/{gl_job.id}/artifacts'
        variables[f'ARTIFACT_JOB_NAME_{arch}'] = gl_job.name
        variables[f'ARTIFACT_JOB_ID_{arch}'] = gl_job.id


def _commit_message(variables):
    """Create a commit message for a new CKI pipeline commit.

    The title is set to the title variable. The detailed commit message
    contains the trigger variables.
    """
    redacted_variables = ['mr_url']
    edited_variables = dict(variables.items())
    for var in redacted_variables:
        if var in variables:
            edited_variables[var] = '[REDACTED]'

    commit_message = [variables['title'], '']
    commit_message += [f'{key} = {value}'
                       for key, value in sorted(edited_variables.items())]

    return '\n'.join(commit_message)


# pylint: disable=too-many-branches
def _create_custom_configuration(gl_project, start_sha, variables):
    """Create a custom pipeline configuration by creating a new branch.

    If necessary, the pipeline configuration at the given start_sha will be modified
    to correspond to the {pipeline_definition,tree}_{repository,branch}_override
    variables.

    Returns the name of the new branch.
    """
    gitlab_ci_raw = gl_project.files.raw('.gitlab-ci.yml', start_sha)
    gitlab_ci_dict = yaml.safe_load(gitlab_ci_raw)

    pipeline_includes = [include for include in gitlab_ci_dict['include']
                         if include['file'] == 'cki_pipeline.yml']
    if len(pipeline_includes) != 1:
        raise Exception('No cki_pipeline.yml include')
    pipeline_include = pipeline_includes[0]

    if 'pipeline_definition_branch_override' in variables:
        pipeline_include['ref'] = variables['pipeline_definition_branch_override']

    content = yaml.dump(gitlab_ci_dict)

    branch = str(uuid.uuid4())
    gl_project.commits.create({
        'branch': branch,
        'start_sha': start_sha,
        'commit_message': _commit_message(variables),
        'actions': [{
            'action': 'update',
            'file_path': '.gitlab-ci.yml',
            'content': content,
        }]
    })
    return branch


def _create_commit(gl_project, variables):
    """Create a commit in the branch where the pipeline will run.

    If the branch does not exist, it will be created automatically if the
    PIPELINE_DEFINITION_URL environment variable is defined.

    Args:
        gl_project:      GitLab project to create commit in.
        variables:       Dictionary with all data to include in the message.
    """
    pipeline_branch = variables['cki_pipeline_branch']
    data = {
        'branch': pipeline_branch,
        'commit_message': _commit_message(variables),
        'actions': [],
    }
    try:
        gl_project.branches.get(pipeline_branch)
    except Exception:
        if 'PIPELINE_DEFINITION_URL' in os.environ:
            pipeline_definition_url = _clean_project_url(
                misc.get_env_var_or_raise('PIPELINE_DEFINITION_URL'))
            _, project_object = gitlab.parse_gitlab_url(pipeline_definition_url)
            pipeline_definition_project = project_object.path_with_namespace
            data['start_branch'] = 'main'
            has_gitlab_ci = any(
                f['name'] == '.gitlab-ci.yml'
                for f in gl_project.repository_tree(ref=data['start_branch'], iterator=True)
            )
            content = {
                'include': [
                    {
                        'project': pipeline_definition_project,
                        'ref': 'production',
                        'file': 'cki_pipeline.yml'
                    },
                ]
            }
            data['actions'].append({
                'action': 'update' if has_gitlab_ci else 'create',
                'file_path': '.gitlab-ci.yml',
                'content': yaml.dump(content)
            })
        else:
            raise Exception(f'Branch {pipeline_branch} missing, unable to create commit') from None
    gl_project.commits.create(data)


def pipeline_project(gl_instance, variables):
    """Return the appropriate GitLab project for a pipeline.

    If necessary, the variables are updated with a cki_project variable, which
    is built from the cki_pipeline_project trigger variable and the
    GITLAB_PARENT_PROJECT environment variable.

    Returns: GitLab project as returned by gl_instance.projects.get()
    """
    if 'cki_pipeline_project' in variables:
        parent_project = misc.get_env_var_or_raise('GITLAB_PARENT_PROJECT')
        cki_pipeline_project = variables.pop('cki_pipeline_project')
        variables['cki_project'] = f'{parent_project}/{cki_pipeline_project}'
    return gl_instance.projects.get(variables['cki_project'])


def trigger(gl_project, variables, *,
            variable_overrides=None,
            is_production=None,
            interactive=False,
            non_production_delay_s=30):
    """Trigger a CKI pipeline in gl_project.

    This is a slightly lower-level interface than trigger_multiple() or
    retrigger(). Please use them whenever possible.

    A commit will be created in the given pipeline branch, and a pipeline will
    be triggered for that branch. This is inherently racy and should never be
    done in parallel for the same branch!

    The trigger token needs to be valid for the given project. If no token is
    specified, the pipeline is triggered via the API.

    All variables are set as trigger variables for the pipeline.

    In addition to that, several variables must be defined for the triggering itself
    to work correctly:

    - cki_pipeline_branch: branch name of gl_project where the pipeline will be
      created; if the branch does not exist, it can be created automatically if
      the PIPELINE_DEFINITION_URL environment variable is defined.
    - title: title for the commit that gets created in cki_pipeline_branch

    If is_production is False, production variables are removed and a testing
    pipeline is triggered after non_production_delay_s seconds. If both
    is_production and interactive are True, the user is asked to explicitly
    confirm the requested pipeline. If is_production is True and interactive is
    False, a production pipeline is triggered directly.
    if is_production is None, production mode is determined based on
    misc.is_production().

    The variable_overrides dicts can be used to modify variables before they
    are used by the pipeline. Those overrides are ignored during variable
    cleaning, so be careful!
    """
    variables = copy.deepcopy(variables)
    variables.update(variable_overrides or {})
    variables = {key: str(value) for key, value in variables.items()}

    if is_production is None:
        is_production = misc.is_production()
    if is_production:
        if interactive:
            print('Trigger variables:')
            for key, value in variables.items():
                print(f'  {key}: {value}')
            if input('Are you sure that you want to submit a new pipeline with '
                     'these variables (enter upper case yes)? ') != 'YES':
                raise Exception('Aborting...')
    else:
        _clean_production_vars(variables, variable_overrides)
        sleep(non_production_delay_s)  # try to avoid collision with production deployment

    _create_commit(gl_project, variables)
    return gl_project.pipelines.create({'ref': variables['cki_pipeline_branch'],
                                        'variables': [{'key': key, 'value': value}
                                                      for key, value in variables.items()]})


def trigger_multiple(gl_instance, pipelines, *,
                     extra_variables=None):
    """Trigger multiple CKI pipelines.

    See the documentation for trigger() for the peculiarities of triggering CKI
    pipelines.

    Production mode is determined based on misc.is_production().

    Args:
        gl_instance:     gitlab.Gitlab object representing GitLab instance.
        pipelines:       List of dictionaries describing the pipeline to
                         trigger. The dictionary contains pipeline variables.
        extra_variables: variable overrides to apply to each pipeline

    Returns: A list of triggered pipelines if no errors occurred.
    Raises: First exception that happened when triggering the pipelines.
    """
    errors = []
    gl_pipelines = []

    for index, variables in enumerate(pipelines):
        try:
            variables = copy.deepcopy(variables)
            variables.update(extra_variables or {})
            gl_project = pipeline_project(gl_instance, variables)
            gl_project = gl_instance.projects.get(variables['cki_project'])
            gl_pipeline = trigger(gl_project, variables)
            gl_pipelines.append(gl_pipeline)

            LOGGER.info('Pipeline %d/%d for %s triggered: %s',
                        index + 1, len(pipelines), variables['cki_pipeline_branch'],
                        gl_pipeline.web_url)
        except Exception as exc:
            errors.append(exc)
            LOGGER.exception(
                "Pipeline %d/%d for %s could not be triggered",
                index + 1,
                len(pipelines),
                variables.get("cki_pipeline_branch", "<Missing branch>"),
            )
    if errors:
        raise errors[0]

    return gl_pipelines


def _migrate_old_trigger_variables(variables):
    """Migrate old trigger variables.

    Sometimes, retriggered pipelines are based on old pipelines containing
    outdated trigger variables. This function is a place to put all those
    temporary hacks to migrate them so that those pipelines can run
    successfully with the current code.
    """
    if 'brew_task_id' in variables and 'officialbuild' not in variables:
        LOGGER.warning('Inferring "officialbuild" from "scratch"')
        scratch = misc.strtobool(variables.get('scratch', 'true'))
        variables['officialbuild'] = misc.booltostr(not scratch)

    if 'AWS_UPT_LAUNCH_TEMPLATE_NAME' in variables:
        LOGGER.warning('Removing override of "AWS_UPT_LAUNCH_TEMPLATE_NAME"')
        del variables['AWS_UPT_LAUNCH_TEMPLATE_NAME']

    if 'name' in variables and 'kcidb_tree_name' not in variables:
        LOGGER.warning('Inferring "kcidb_tree_name" from "name"')
        variables['kcidb_tree_name'] = variables['name']


def retrigger(gl_project, pipeline_id, *,
              variable_overrides=None,
              is_production=False,
              interactive=False,
              non_production_delay_s=0):
    # pylint: disable=too-many-locals
    """Retrigger an existing CKI pipeline.

    See the documentation for raw_trigger for the peculiarities of triggering
    CKI pipelines.

    For variable_overrides/is_production/interactive/non_production_delay, see trigger().

    Note that some of those parameters have different defaults here!

    Args:
        gl_project:         project as returned by gl_instance.projects.get()
        pipeline_id:        Pipeline ID as accepted by gl_project.pipelines.get(pipeline_id)
    """
    gl_original_pipeline = gl_project.pipelines.get(pipeline_id)
    variables = gitlab.get_variables(gl_original_pipeline)
    _migrate_old_trigger_variables(variables)
    variables['original_pipeline'] = gl_original_pipeline.web_url
    variables.update(variable_overrides or {})
    custom_branch = None
    try:
        if not is_production:
            if misc.strtobool(variables.get('tests_only', 'False')):
                _configure_tests_only(gl_project, gl_original_pipeline, variables)
            custom_branch = _create_custom_configuration(
                gl_project, gl_original_pipeline.sha, variables)
            LOGGER.info('Created branch %s in %s', custom_branch, gl_project.web_url)
            variables['cki_pipeline_branch'] = custom_branch

        gl_new_pipeline = trigger(gl_project, variables,
                                  variable_overrides=variable_overrides,
                                  is_production=is_production,
                                  interactive=interactive,
                                  non_production_delay_s=non_production_delay_s)
        LOGGER.info('Pipeline for %s triggered: %s',
                    variables['cki_pipeline_branch'], gl_new_pipeline.web_url)
    finally:
        if custom_branch:
            gl_project.branches.delete(custom_branch)
            LOGGER.info('Deleted branch %s in %s',
                        custom_branch, gl_project.web_url)
    return gl_new_pipeline


def _match(variables, key, value):
    values = list(misc.flattened(value))
    if key not in variables:
        return None in values
    for single_value in values:
        match single_value:
            case None:
                continue
            case str():
                if re.fullmatch(single_value, variables[key]):
                    return True
            case dict():
                if not re.fullmatch(single_value['not'], variables[key]):
                    return True
            case _:
                raise Exception(f'Pipeline filters do not support datatype of {single_value}')
    return False


def last_pipelines_for_branch(gl_project, ref, *,
                              variable_filter=None,
                              list_filter=None,
                              per_page=100,
                              max_count=5000,
                              pipeline_count=1):
    """Return the last pipelines of a certain branch.

    The variable_filter parameter can either be a single filter or a list of filters:
    - None: will only match if the variable is not present on the pipeline
    - 'str': will only match if the variable value matches the regex
    - {not: 'str'}: will only match if the variable value does not match the regex

    Regular expressions are matched according to re.fullmatch().

    The list_filter parameter is passed to gl_project.pipelines.list.

    Returns an empty list if no such pipeline exists.
    """
    gl_pipelines = []
    variable_filter = variable_filter or {}
    for index, gl_pipeline in enumerate(gl_project.pipelines.list(
            iterator=True, ref=ref, per_page=per_page, **list_filter or {})):
        if max_count and index > max_count:
            return gl_pipelines
        variables = gitlab.get_variables(gl_pipeline)
        # ignore retriggered pipelines, or any other non-prod
        if (
            misc.strtobool(variables.get("CKI_RETRIGGER_PIPELINE", "false"))
            or variables.get("CKI_DEPLOYMENT_ENVIRONMENT", "production") != "production"
        ):
            continue
        if not all(_match(variables, key, value) for key, value in variable_filter.items()):
            continue
        gl_pipelines.append(gl_pipeline)
        if len(gl_pipelines) == pipeline_count:
            break

    return gl_pipelines


def last_pipeline_for_branch(gl_project, ref, **kwargs):
    """Return the last pipeline of a certain branch.

    Forwards to last_pipelines_for_branch.

    Returns None if no such pipeline exists.
    """
    return next(iter(last_pipelines_for_branch(gl_project, ref, **kwargs)), None)


def last_successful_pipeline_for_branch(gl_project, ref, *,
                                        variable_filter=None):
    """Return the last successful pipeline of a certain branch.

    The variable_filter parameter can contain regular expressions to only
    return matching pipelines according to re.fullmatch().
    Filters equal to None will only match if the variable is not present on the
    pipeline.

    Returns None if no such pipeline exists.
    """
    list_filter = {"scope": "finished", "status": "success"}
    variable_filter = variable_filter or {}
    return last_pipeline_for_branch(gl_project, ref,
                                    list_filter=list_filter,
                                    variable_filter=variable_filter)
